* Create mover data

	frames reset 
	use "${clean_data}/panel_physicians_clean.dta", clear
	
	* Restrict to age 40-55
	loc agerange = "40,55" 
	
	* Define and count moves in age-range 40-55
	xtset npi year
	gen moved = cz90 != L.cz90 if cz90 != . & L.cz90 !=. & inrange(age, `agerange') // Accounting for age-range
	gegen moves = sum(moved), by(id)

	* Creates flags for never movers and single movers 
	* (omitted categories - do not observe consecutive years, or observe
	* more than one move)
	gen mover_type = "Never Movers" if moves == 0
	replace mover_type = "Single Movers" if moves == 1
	keep if inlist(mover_type, "Never Movers", "Single Movers")
	drop moves
	
	* Creating relative year variable
	gen temp = year if  moved == 1
	gegen moveyear = max(temp), by(id)
	drop temp

	gen relyear = . 
	replace relyear = 0 if mover_type == "Never Movers"	
	replace relyear = year - moveyear if mover_type == "Single Movers" 
	replace relyear=relyear+16 // +16 to avoid negative factor variables	

	* Generating indicator for ranked med. school 
	merge m:1 med_sch using "${intermediate_data}/medical_school_ranking/medical_school_ranks.dta", ///
		keep(master match) keepusing(newsrank*) nogen
	
	gegen meanrankedmed = rowmean(newsrank*) 
	gen toprankedmed = 1 if meanrankedmed > 0 & !mi(meanrankedmed)
	replace toprankedmed = 0 if mi(toprankedmed) & !mi(med_sch)	
	
	gsort id year
	assert !missing(mover_type)
	gisid id year
	gisid id relyear if mover_type == "Single Movers"
	assert relyear==16 if mover_type == "Never Movers"
	
	* Save mover data
	save "${intermediate_data}/twfe/physicians_movers.dta", replace

	keep if inrange(age, `agerange')	
	keep id npi year cz90 age spec_category_code spec_category_desc logptotinc ///
	toprankedmed relyear moveyear mover_type

* Regressions 

	frame create eventstudy
	
	foreach x in ALL SPECIAL PCPDOCS RESTRICSPEC RANKED NONRANKED {
		
		if "`x'" == "ALL" 		local restriction "!mi(npi)"
		else if "`x'" == "SPECIAL"	local restriction "spec_category_code != 1"
		else if "`x'" == "PCPDOCS"	local restriction "spec_category_code == 1"
		else if "`x'" == "RESTRICSPEC"	local restriction "inlist(spec_category_code, 2, 3, 4, 5, 7, 8)"
		else if "`x'" == "RANKED"	local restriction "toprankedmed == 1"
		else if "`x'" == "NONRANKED"	local restriction "toprankedmed == 0"
		
		
		* CZ-level income exposure variable
		
		if inlist("`x'", "ALL", "RANKED", "NONRANKED") {
			
			gegen czlogptotinc = mean(logptotinc), by(cz90)

			xtset id year 

			gen temp = czlogptotinc - L.czlogptotinc
			egen incdiff_pos = max(temp), by(id)
			egen incdiff_neg = min(temp), by(id)
			gen incdiff=incdiff_pos if incdiff_pos!=0
			replace incdiff=incdiff_neg if incdiff_neg!=0
			drop temp incdiff_pos incdiff_neg
		}
				
		else if inlist("`x'", "SPECIAL", "PCPDOCS", "RESTRICSPEC") {
			
			gegen czlogptotinc = mean(logptotinc) if `restriction', by(cz90) 

			xtset id year 
			
			gen temp = czlogptotinc - L.czlogptotinc if `restriction'
			egen incdiff_pos = max(temp), by(id)
			egen incdiff_neg = min(temp), by(id)
			gen incdiff=incdiff_pos if incdiff_pos!=0
			replace incdiff=incdiff_neg if incdiff_neg!=0
			drop temp incdiff_pos incdiff_neg
		}
		
		* - (1) Event Study

		reghdfe logptotinc i(10/14 16/22).relyear#c.incdiff i(10/14 16/22).relyear if  `restriction' & inrange(relyear, 10, 22) & mover_type == "Single Movers", a(id age) cluster(cz90 year)
		local N_UR = e(N)

		qui sum logptotinc if e(sample)
			local lhs_mean = `r(mean)'
			local lhs_sd = `r(sd)'
			
			
		preserve
			egen flag=  seq() if e(sample), by(npi) 
			count if flag == 1 
			local unique_phys = r(N)
		restore
		
		preserve

			parmest, norestore
			keep if regexm(parm, "#")
			gen relyear = real(substr(parm,1,2)) - 16
			set obs `=_N + 1'
			replace estimate = 0 in `=_N'
			replace min95 = 0 in `=_N'
			replace max95 = 0 in `=_N'
			replace relyear = -1 in `=_N'
			sort relyear
			gen N_UR = `N_UR'
	
			if inlist("`x'", "ALL", "RANKED", "NONRANKED","SPECIAL", "PCPDOCS" ) {
				gen depvar = "logptotinc"
				gen agerange = "age4055"
				gen sample = "`x'"
				rename parm indepvar
				
				tempfile es_`x'
				save `es_`x'', replace	
				
				frame eventstudy: append using `es_`x''
				drop depvar agerange sample N_UR
			}
			
			* Output regression results
			
			gen unique_phys = `unique_phys'
			gen mean_LHS = `lhs_mean'
			gen sd_LHS = `lhs_sd'
			
			keep relyear estimate stderr mean_LHS sd_LHS unique_phys
			order relyear estimate stderr mean_LHS sd_LHS unique_phys
			
			save "${output}/twfe/geo_twfe/physicians/geotwfe02-eventstudy_`x'_physicians.dta", replace
		
		restore
		
		* - (2) Two-Way Fixed Effects
		
		preserve
		
			keep if mover_type == "Single Movers"
			reghdfe logptotinc if `restriction', a(cz90 age id relyear, savefe) resid
			local N_UR = e(N)

			keep if e(sample)
			
			keep logptotinc cz90 spec_category_code spec_category_desc id year __hdfe* _reghdfe_resid 
			rename (__hdfe1__ __hdfe2__ __hdfe3__ __hdfe4__)  (__hdfePLACE__ __hdfeAGE__ __hdfeINDIVID__ __hdfeRELYEAR__)
			g N_UR = `N_UR'
			save "${output}/twfe/geo_twfe/physicians/IndividFixedEffects_`x'_physicians", replace

			gcollapse (mean) logptotinc __hdfe*__ N_UR, by(cz90)

			save "${output}/twfe/geo_twfe/physicians/CZFixedEffects_`x'_physicians", replace
		
		restore
		
		drop czlogptotinc* incdiff
	}
	
	* Export data
	frame change eventstudy
	keep depvar agerange indepvar estimate relyear min95 max95  sample N_UR
	drbest_docinc estimate min95 max95 
	order N_UR, last 
	export delimited using "${mypath}/intermediate_csv/geotwfe01-eventstudy_ALL_physicians.csv", replace dataf delim(tab)

********* Process results of Two-Way Fixed Effects Regressions 

	foreach x in ALL SPECIAL PCPDOCS RESTRICSPEC RANKED NONRANKED {
		
		use "${output}/twfe/geo_twfe/physicians/IndividFixedEffects_`x'_physicians", clear

		local N_UR = `=N_UR[1]'
		
		rename  (__hdfePLACE__ __hdfeAGE__ __hdfeINDIVID__ __hdfeRELYEAR__) (__hdfe1__ __hdfe2__ __hdfe3__ __hdfe4__) 

		preserve
		
			foreach y in 1 2 3 4 {
				qui sum __hdfe`y'__
				gen sd_hdfe`y' = `r(sd)'
				gen CoVar_hdfe`y' = `r(Var)'

				foreach z in 1 2 3 4 {
					if `y' < `z' {
						qui corr __hdfe`y'__ __hdfe`z'__
						gen  Corr_hdfe`y'`z' = r(C)[1,2]

						qui corr __hdfe`y'__ __hdfe`z'__, cov
						gen CoVar_hdfe`y'`z' = `r(cov_12)'
					}
				}
			}

			* Variance of the residual
			qui sum _reghdfe_resid 
			gen CoVar_resid = `r(Var)'

			gcollapse (sd) sd_logptotinc = logptotinc (first) sd_* Corr_* CoVar_*

			gen CoVar_logptotinc = sd_logptotinc * sd_logptotinc

			gen CoVar_recomposed =	CoVar_hdfe1 + CoVar_hdfe2 + CoVar_hdfe3 + CoVar_hdfe4 ///
				+ 2*CoVar_hdfe12 + 2*CoVar_hdfe13 + 2*CoVar_hdfe14 + 2*CoVar_hdfe23 + 2*CoVar_hdfe24 + 2*CoVar_hdfe34 + CoVar_resid

			gen sd_recomposed = sqrt(CoVar_recomposed)

			gen spec = "Person-Year Level"

			save "${temp}/personyear_`x'_physicians", replace
	
		restore

		gcollapse logptotinc __hdfe* _reghdfe_resid, by(cz90)

		foreach y in 1 2 3 4 {
			qui sum __hdfe`y'__, d
			gen sd_hdfe`y' = `r(sd)'
			gen CoVar_hdfe`y' = `r(Var)'

			foreach z in 1 2 3 4 {
				if `y' < `z' {
					qui corr __hdfe`y'__ __hdfe`z'__
					gen  Corr_hdfe`y'`z' = r(C)[1,2]

					qui corr __hdfe`y'__ __hdfe`z'__, cov
					gen CoVar_hdfe`y'`z' = `r(cov_12)'
				}
			}
		}

		* Variance of the residual
		qui sum _reghdfe_resid 
		gen CoVar_resid = `r(Var)'

		gcollapse (sd) sd_logptotinc = logptotinc (first) sd_* Corr_* CoVar_*

		gen CoVar_logptotinc = sd_logptotinc * sd_logptotinc

		gen CoVar_recomposed =	CoVar_hdfe1 + CoVar_hdfe2 + CoVar_hdfe3 + CoVar_hdfe4 ///
			+ 2*CoVar_hdfe12 + 2*CoVar_hdfe13 + 2*CoVar_hdfe14 + 2*CoVar_hdfe23 + 2*CoVar_hdfe24 + 2*CoVar_hdfe34 + CoVar_resid

		gen sd_recomposed = sqrt(CoVar_recomposed)

		gen spec = "CZ Level"

		append using "${temp}/personyear_`x'_physicians"
		
		reshape long sd_ CoVar_ Corr_, i(spec) j(effect) string
		
		replace effect = subinstr(effect, "12", "Place/Age", .)
		replace effect = subinstr(effect, "13", "Place/Individ", .)
		replace effect = subinstr(effect, "14", "Place/RelYear", .)
		replace effect = subinstr(effect, "23", "Age/Individ", .)
		replace effect = subinstr(effect, "24", "Age/RelYear", .)
		replace effect = subinstr(effect, "34", "Individ/RelYear", .)

		replace effect = subinstr(effect, "1", "Place", .)
		replace effect = subinstr(effect, "2", "Age", .)
		replace effect = subinstr(effect, "3", "Individ", .)
		replace effect = subinstr(effect, "4", "RelYear", .)
		replace effect = subinstr(effect, "hdfe", "", .)
		
		gen id = . 
		replace id = 1 if effect == "logptotinc"
		replace id = 2 if effect == "recomposed"
		replace id = 3 if effect == "Place"
		replace id = 4 if effect == "Age"
		replace id = 5 if effect == "Individ"
		replace id = 6 if effect == "RelYear"
		replace id = 7 if effect == "Place/Age"
		replace id = 8 if effect == "Place/Individ"
		replace id = 9 if effect == "Place/RelYear"
		replace id = 10 if effect == "Age/Individ"
		replace id = 11 if effect == "Age/RelYear"
		replace id = 12 if effect == "Individ/RelYear"

		bys spec: gen temp = CoVar_ if effect == "recomposed"
		egen temp2 = max(temp), by(spec)
		by spec: gen Var_share = CoVar_ / temp2
		
		gen sample = "`x'"
		
		gen N_UR = `N_UR'

		order sample spec, first 
		gsort sample -spec id
		order N_UR, last
		drop id temp temp2
		
		save "${temp}/card_`x'_physicians", replace
	}

	use "${temp}/card_ALL_physicians", clear 

	foreach x in SPECIAL PCPDOCS RESTRICSPEC RANKED NONRANKED {
		append using "${temp}/card_`x'_physicians"
	}

	save "${output}/twfe/geo_twfe/physicians/geotwfe04-earnings_decomp_card_controls_physicians.dta", replace
